* compute summary statistics reported in Figure 1; Tables 1 and 2; Appendix Figure 1; Appendix Tables 4 and 5

* Note: numbers for Figure 1 and Table 2 are saved in csv files

clear all
set type double
log using summary_statistics, replace text

* macro for demographic group
global demo = "urban old income_group"

* macro for vehicle identifier
global vehicle = "make model series drive_type fuel_type body_style liter"

******************
* Figure 1
******************

*********
* read in data, create vehicle/group identifiers and compute sales by vehicle
*********

* load data
use "L:\Project-MaritzCX\Workspace1\Public\CAFE Ex Post Replication\Data\supply_estimation_results", clear

* demographic and vehicle variables
egen demo = group($demo)
egen vehicle = group($vehicle)

* aggregate sales across demographic groups
sort vehicle market_ID demo
by vehicle market_ID: egen vsales = sum(sales)

* keep one observation per vehicle
sort vehicle market_ID
drop if vehicle==vehicle[_n-1] & market_ID==market_ID[_n-1]

*********
* compute means of attributes
*********

* log horsepower-weight
gen ln_hw = ln(hp/weight)

* compute sales weights
sort market_ID truck vehicle
by market_ID truck: egen wt = sum(vsales)
replace wt = vsales/wt

* compute means
sort market_ID truck vehicle
foreach kind in mpg trans_price ln_hw {
	by market_ID truck: egen av_`kind' = sum(wt*`kind')
}


*********
* collapse data set and save data for figure
*********

sort market_ID truck vehicle
drop if market_ID==market_ID[_n-1] & truck==truck[_n-1]
keep market_ID truck av_*
sort truck market_ID

export delimited using "L:\Project-MaritzCX\Workspace1\Public\CAFE Ex Post Replication\Replication Results\figure1", replace

******************
* Table 1
******************

***********
* load data and create variables not already included in data set
***********

* load data
use "L:\Project-MaritzCX\Workspace1\Public\CAFE Ex Post Replication\Data\supply_estimation_results", clear

* ratio of horsepower to weight
gen hp_w = hp/weight

* dummies for hybrid, plug-in hybrid, and electric
gen hybrid = fuel_type=="Hybrid"
gen phev = fuel_type=="Plug-in Hybrid"
gen ev = fuel_type=="Electric"

* dummy for cars
gen car = truck==0

* dummy for SUVs
gen suv = body=="Sport Utility"

* dummy for all-wheel drive
gen awd = drive=="All Wheel Drive"

***********
* column 1: full sample
***********

summ trans_price mpg cpm hp_w footprint hybrid phev ev car suv awd [aw=sales]

***********
* columns 2-6: by income group
***********

sort income_group
by income_group: summ trans_price mpg cpm hp_w footprint hybrid phev ev car suv awd [aw=sales]

******************
* Table 2
******************

* load data
use "L:\Project-MaritzCX\Workspace1\Public\CAFE Ex Post Replication\Data\supply_estimation_results", clear

* ratio of horsepower to weight
gen hp_w = hp/weight

* keep 2015 data
keep if market_ID == 2015

* collapse data set to year-vehicle
collapse trans_price mpg hp_w (sum) sales, by(market_ID $vehicle)

* compute sales weight and sales share (as percentage)
egen sales_market = sum(sales)
gen sales_share = 100*sales/sales_market 

* create dummy variable for multiple engine versions
bysort make model series fuel_type drive_type body_style: gen engine_ID = _n
bysort make model series fuel_type drive_type body_style: egen sum_sales_twins = sum(sales)
bysort make model series fuel_type drive_type body_style: egen engine_count = max(engine_ID)
gen enginetwin = 0
replace enginetwin = 1 if engine_count > 1

* keep selected sample of twins to report in table
gen selected_sample = 0
replace selected_sample = 1 if make == "JEEP" & model == "CHEROKEE" & series == "LATITUDE" & drive_type == "All Wheel Drive" & enginetwin == 1 
replace selected_sample = 1 if make == "FORD" & model == "F SERIES" & series == "NSI" & fuel_type == "Flexible (gasoline/ethanol)" & drive_type == "4X4 or Front Wheel Drive" & enginetwin == 1 
replace selected_sample = 1 if make == "NISSAN" & model == "ALTIMA" & series == "NSI" & enginetwin == 1 
replace selected_sample = 1 if make == "TOYOTA" & model == "CAMRY" & series == "XLE" & drive_type == "Front Wheel Drive" & enginetwin == 1 
replace selected_sample = 1 if make == "KIA" & model == "SORENTO" & series == "LX" & drive_type == "All Wheel Drive" & enginetwin == 1 
keep if selected_sample == 1

* create twin ID number corresponding to order reported in table
gen twin_ID = 1 if make == "TOYOTA" & model == "CAMRY" & series == "XLE"
replace twin_ID = 2 if make == "NISSAN" & model == "ALTIMA"
replace twin_ID = 3 if make == "FORD" & model == "F SERIES"
replace twin_ID = 4 if make == "KIA" & model == "SORENTO"
replace twin_ID = 5 if make == "JEEP" & model == "CHEROKEE" & series == "LATITUDE"

* save numbers for table
keep $vehicle twin_ID trans_price mpg hp_w sales sales_share 
sort twin_ID liter
export delimited using "L:\Project-MaritzCX\Workspace1\Public\CAFE Ex Post Replication\Replication Results\table2", replace

******************
* Appendix Figure 1
******************

* load data
use "L:\Project-MaritzCX\Workspace1\Public\CAFE Ex Post Replication\Data\supply_estimation_results", clear

* keep 2012
keep if market_ID==2012

* demographic and vehicle variables
egen demo = group($demo)
egen vehicle = group($vehicle)

* compute total sales by vehicle
sort vehicle demo
by vehicle: egen vsales = sum(sales)

* keep one observation per vehicle
sort vehicle demo
drop if vehicle==vehicle[_n-1]
drop sales

* compute standards
gen a_2012 = 35.95 if truck==0
gen b_2012 = 27.95 if truck==0
gen c_2012 = 0.0005308 if truck==0
gen d_2012 = 0.006057 if truck==0
replace a_2012 = 29.82 if truck==1
replace b_2012 = 22.27 if truck==1
replace c_2012 = 0.0004546 if truck==1
replace d_2012 = 0.0149 if truck==1
gen a_2022 = 50.24 if truck==0
gen b_2022 = 37.59 if truck==0
gen c_2022 = 0.000447 if truck==0
gen d_2022 = 0.00159 if truck==0
replace a_2022 = 40.31 if truck==1
replace b_2022 = 26.02 if truck==1
replace c_2022 = 0.000499 if truck==1
replace d_2022 = 0.00436 if truck==1
qui for X in num 2012 2022: gen standard_X = 0.8/(min(max(c_X*footprint + d_X, 1/a_X), 1/b_X))

* save data for plotting
keep vehicle mpg truck footprint standard_*
sort truck vehicle
export delimited using "L:\Project-MaritzCX\Workspace1\Public\CAFE Ex Post Replication\Replication Results\app_figure1", replace

******************
* Appendix Tables 4 and 5
******************

*******
* get APR by demographic group
*******

* load data
use "L:\Project-MaritzCX\Workspace1\CAFE ex post\SW\Data assembly\Cleaned data\apr_mean_sd_by_market_demo", clear

* harmonize demographic variables
gen old = young==0
gen income_group = 1 if hhinc=="<44k"
replace income_group = 2 if hhinc=="44-91k"
replace income_group = 3 if hhinc=="91-123k"
replace income_group = 4 if hhinc=="123-185k"
replace income_group = 5 if hhinc==">185k"

* keep APR by group and year
rename apr_mean apr
keep $demo market_ID apr
sort $demo market_ID
save temp, replace

*******
* merge main data set with APR and compute statistics
*******

* load data
use "L:\Project-MaritzCX\Workspace1\Public\CAFE Ex Post Replication\Data\supply_estimation_results", clear

* merge with APR
sort $demo market_ID $vehicle
merge m:1 $demo market_ID using temp
keep if _merge--3
drop _merge

* create dummy for hybrid or plug-in
gen hyb_plug = fuel_type=="Hybrid" | fuel_type=="Plug-in Hybrid" | fuel_type=="Electric"

* ratio of horsepower to weight
gen hp_w = hp/weight

* compute 2016 fuel economy requirements
gen a_2016 = 41.09 if truck==0
gen b_2016 = 30.96 if truck==0
gen c_2016 = 0.0005308 if truck==0
gen d_2016 = 0.002573 if truck==0
replace a_2016 = 34.42 if truck==1
replace b_2016 = 24.74 if truck==1
replace c_2016 = 0.0004546 if truck==1
replace d_2016 = 0.010413 if truck==1
gen standard_2016 = 1/(min(max(c_2016*footprint + d_2016, 1/a_2016), 1/b_2016))

* compute ratio of used sales to market size
gen share_used = used_sales/mkt_size

* report summary statistics, first for rural then for urban
sort old income_group
by old income_group: summ trans_price apr truck hyb_plug mpg hp_w footprint standard_2016 share_used if urban==0 [aw=sales]
by old income_group: summ trans_price apr truck hyb_plug mpg hp_w footprint standard_2016 share_used if urban==1 [aw=sales]


log close
